###
# TRANSIT STOP LINK GENERATION SCRIPT
# v0.0.3 JUN-9-2020
# ---
#
# This file takes each saved .dta file, with farecard
# origin/destination pairs, and generates two CSV files: one of bus
# stops and one of MRT stations.
###

import sys
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin
import seaborn as sns
from tqdm import tqdm
tqdm.pandas()


# import the config file. This config file should contain common paths
# (see my example)
sys.path.append("code/")
sys.path.append("../")
import config  # noqa
sns.set()  # noqa

git = config.ROOT
make_data = config.MAKE_DATA_PATH
local_path = config.LOCAL_PATH

# import subzone shapes and get list of subzone names
sg = gpd.read_file(
    git + '/data/master-plan-2014-subzone-boundary-no-sea/' +
    'master-plan-2014-subzone-boundary-no-sea-shp/' +
    'MP14_SUBZONE_NO_SEA_PL.shp').to_crs(4326).sort_values(
        ["PLN_AREA_N", "SUBZONE_N"]).reset_index(drop=True)[
            ["PLN_AREA_N", "SUBZONE_N", "geometry"]]
sgNames = list(sg.SUBZONE_N.values)

###
# merge bus stop and MRT coords with LTA bus stop IDs
###


def getBusSubzones(sg=sg):
    dBus = gpd.read_file(
        git + '/data/BusStopLocation_Jan2020/BusStop.shp').to_crs(4326)
    dBus["BUS_STOP_N"] = dBus.BUS_STOP_N.astype("int64")

    dBusInd = sjoin(dBus, sg, how="left", op="within").drop(
        ["index_right", "BUS_ROOF_N"], axis=1)

    dBusIDs = pd.read_csv(git + "data/bus_stop_ids_clean.txt",
                          sep="\t", header=(0))

    # merge bus stop IDs with codes
    dBM = dBusIDs.merge(
        dBusInd, left_on="BUS_STOP_CD", right_on="BUS_STOP_N",
        how="inner").drop("BUS_STOP_N", axis=1)

    dBM = dBM.rename(
        {"PLN_AREA_N": "planning_area",
         "SUBZONE_N": "subzone"}, axis=1)

    dBM['LON'] = [pt.x for pt in dBM.geometry]
    dBM['LAT'] = [pt.y for pt in dBM.geometry]

    dBM.drop('geometry', axis=1).to_csv(make_data + "bus_stop_subzones.csv")


def getMRTSubzones(sg=sg):
    dStations = pd.read_csv(git + "data/stationCoords.csv").iloc[:, 1:]
    dStations = gpd.GeoDataFrame(dStations, geometry=gpd.points_from_xy(
        dStations.LON, dStations.LAT), crs="epsg:4326")
    dStationIDs = pd.read_csv(git + "data/station_ids_clean.txt")

    # which subzone is each MRT/LRT station in?
    dStationsInd = sjoin(dStations, sg, how="left", op="within").drop(
        "index_right", axis=1)

    dSM = dStationIDs.merge(dStationsInd, left_on="mrt_station_english",
                            right_on="station", how="left").drop(
        ["station"], axis=1).rename(
        {"mrt_station_english": "station"},
        axis=1)

    dSM = dSM[dSM.subzone.notna()].rename(
        {"PLN_AREA_N": "planning_area",
         "SUBZONE_N": "subzone"}, axis=1)

    dSM['LON'] = [pt.x for pt in dSM.geometry]
    dSM['LAT'] = [pt.y for pt in dSM.geometry]

    dSM.drop('geometry', axis=1).to_csv(make_data + "mrt_station_subzones.csv")


###
# get GPS coords in Python. Leave merging to STATA (see 1...)
###

if __name__ == "__main__":
    getBusSubzones()
    getMRTSubzones()
